home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Amiga Collections: Taifun
/
Taifun 029 (1987-08-15)(Ossowski, Stefan)(DE)(PD).zip
/
Taifun 029 (1987-08-15)(Ossowski, Stefan)(DE)(PD).adf
/
Utilities
/
FComp
/
FComp.c
< prev
next >
Wrap
C/C++ Source or Header
|
1978-08-10
|
7KB
|
310 lines
/* FCOMP.C - A File Comparison Program
*
* Source: A File Comparison Program
* Webb Miller and Eugene W. Myers
* SOFTWARE - PRACTICE AND EXPERIENCE
* Vol. 15(11), 1025-1040
* November, 1985
* Copyright John Wiley & Sons, Ltd.
*
* Usage: fcomp [-n] file1 file2
*
* where the optional flag "n" is an integer constant
* that limits the size of edit scripts that will be
* considered by fcomp. If all edit scripts changing
* file1 to file2 contain more than "n" insertions
* and deletions, then a message to that effect is
* all that is printed. If no "n" is specified, then
* arbitrarily long scripts may be considered.
*/
/*** Include Files ***/
#include <stdio.h>
/*** Definitions ***/
#define MAXLINES 2000 /* Maximum # of lines in a file */
#define ORIGIN MAXLINES /* Subscript for diagonal zero */
#define INSERT 1
#define DELETE 2
/*** Structures ***/
/* Edit scripts are stored in linked lists */
struct edit
{
struct edit *link; /* Previous edit command */
int op; /* INSERT or DELETE */
int line1; /* Line number in file1 */
int line2; /* Line number in file2 */
};
/*** Global Variables ***/
char *A[MAXLINES], /* Ptrs. to lines of file1 and file2 */
*B[MAXLINES];
/*** Main Body of Program ***/
int main(argc,argv)
int argc;
char **argv;
{
int max_d, /* Bound on size of edit script */
m, /* Number of lines in file1 */
n, /* Number of lines in file2 */
lower, /* Left-most diagonal under consideration */
upper, /* Right-most diagonal under consideration */
d, /* Current edit distance */
k, /* Current diagonal */
row, /* Row number */
col; /* Column number */
/* For each diagonal, two items are saved */
int last_d[2*MAXLINES+1]; /* Row containing last d */
struct edit *script[2*MAXLINES+1]; /* Corresponding edit script */
struct edit *new;
char *malloc();
void put_scr(),
fatal(),
exceed();
if(argc > 1 && argv[1][0] == '-')
{
max_d = atoi(&argv[1][1]);
++argv;
--argc;
}
else
max_d = 2*MAXLINES;
if(argc != 3)
fatal("FCOMP requires two file names.");
/* Read in file1 and file2 */
m = in_file(argv[1],A);
n = in_file(argv[2],B);
/* Initialize: 0 entries in D indicate identical prefixes */
for(row = 0; row < m && row < n && strcmp(A[row],B[row]) == 0; ++row)
;
last_d[ORIGIN] = row;
script[ORIGIN] = NULL;
lower = (row == m) ? ORIGIN + 1 : ORIGIN - 1;
upper = (row == n) ? ORIGIN - 1 : ORIGIN + 1;
if(lower > upper)
{
puts("The files are identical.");
exit(0);
}
/* For each value of the edit distance */
for(d = 1; d <= max_d; ++d)
{
/* For each relevant diagonal */
for(k = lower; k <= upper; k += 2)
{
/* Get space for next edit instruction */
new = (struct edit *)malloc(sizeof(struct edit));
if(!new)
exceed(d);
/* Find a d on diagonal k */
if(k == ORIGIN-d || k != ORIGIN+d && last_d[k+1] >= last_d[k-1])
{
/* Moving down from the last d-1 on diagonal k+1 puts you
* farther along diagonal k than does moving right from the
* last d-1 on diagonal k-1.
*/
row = last_d[k+1]+1;
new->link = script[k+1];
new->op = DELETE;
}
else
{
/* Move right from the last d-1 on diagonal k-1 */
row = last_d[k-1];
new->link = script[k-1];
new->op = INSERT;
}
/* Code common to the two cases */
new->line1 = row;
new->line2 = col = row + k - ORIGIN;
script[k] = new;
/* Slide down the diagonal */
while(row < m && col < n && strcmp(A[row],B[col]) == 0)
{
++row;
++col;
}
last_d[k] = row;
if(row == m && col == n)
{
/* Hit southeast corner, have the answer */
put_scr(script[k]);
exit(0);
}
if(row == m) /* Hit last row, don't look to left */
lower = k+2;
if(col == n) /* Hit last column, don't look to right */
upper = k-2;
}
--lower;
++upper;
}
exceed(d);
}
/*** Functions ***/
/* IN_FILE() - Read in a file and return a count of the lines */
int in_file(filename,P)
char *filename,
*P[];
{
char buf[100],
*malloc(),
*fgets(),
*save,
*b;
FILE *fp,
*fopen();
int lines = 0;
if(!(fp = fopen(filename,"r")))
{
fprintf(stderr,"Cannot open file %s.\n",filename);
exit(1);
}
while(fgets(buf,100,fp))
{
if(lines >= MAXLINES)
fatal("File is too large for FCOMP.");
if(!(save = malloc(strlen(buf)+1)))
fatal("Not enough memory to save the files.");
P[lines++] = save;
for(b = buf; *save++ = *b++; ) /* Copy the line */
;
}
fclose(fp);
return(lines);
}
/* PUT_SCR() - Print the edit script */
void put_scr(start)
struct edit *start;
{
struct edit *ep,
*behind,
*ahead,
*a,
*b;
int change;
/* Reverse the pointers */
ahead = start;
ep = NULL;
while(ahead)
{
behind = ep;
ep = ahead;
ahead = ahead->link;
ep->link = behind; /* Flip the pointer */
}
/* Print the commands */
while(ep)
{
b = ep;
if(ep->op == INSERT)
printf("Inserted after line %d:\n",ep->line1);
else
{
do /* DELETE */
{
/* Look for a block of consecutive deleted lines */
a = b;
b = b->link;
}
while(b && b->op == DELETE && b->line1 == a->line1 + 1);
/* Now b points to the command after the last deletion */
if(change = (b && b->op == INSERT && b->line1 == a->line1))
printf("Changed ");
else
printf("Deleted ");
if(a == ep)
printf("line %d:\n",ep->line1);
else
printf("lines %d-%d:\n",ep->line1,a->line1);
/* Print the deleted lines */
do
{
printf(" %s",A[ep->line1 - 1]);
ep = ep->link;
}
while(ep != b);
if(!change)
continue;
printf("To:\n");
}
/* Print the inserted lines */
do
{
printf(" %s",B[ep->line2 - 1]);
ep = ep->link;
}
while(ep && ep->op == INSERT && ep->line1 == b->line1);
}
}
/* FATAL() - Print error message and die */
void fatal(msg)
char *msg;
{
fprintf(stderr,"%s\n",msg);
exit(1);
}
/* EXCEED() - The difference exceeds "d" */
void exceed(d)
int d;
{
fprintf(stderr,"The files differ in at least %d lines.\n",d);
exit(1);
}
/*** End of FCOMP.C ***/